In [1]:
import pandas as pd
In [2]:
from sklearn import tree
In [3]:
X = [[0, 0], [1, 2]]
y = [0, 1]
In [4]:
clf = tree.DecisionTreeClassifier()
In [5]:
clf = clf.fit(X, y)
In [6]:
clf.predict([[2., 2.]])
Out[6]:
In [7]:
clf.predict_proba([[2. , 2.]])
Out[7]:
In [8]:
clf.predict([[0.4, 1.2]])
Out[8]:
In [9]:
clf.predict_proba([[0.4, 1.2]])
Out[9]:
In [10]:
clf.predict_proba([[0, 0.2]])
Out[10]:
DecisionTreeClassifier
is capable of both binary (where the labels are [-1, 1]) classification and multiclass (where the labels are [0, …, K-1]) classification.
In [11]:
from sklearn.datasets import load_iris
from sklearn import tree
iris = load_iris()
In [12]:
iris.data[0:5]
Out[12]:
In [13]:
iris.feature_names
Out[13]:
In [14]:
X = iris.data[:, 2:]
In [15]:
y = iris.target
In [16]:
y
Out[16]:
In [17]:
clf = tree.DecisionTreeClassifier(random_state=42)
In [18]:
clf = clf.fit(X, y)
In [19]:
from sklearn.tree import export_graphviz
In [20]:
export_graphviz(clf,
out_file="tree.dot",
feature_names=iris.feature_names[2:],
class_names=iris.target_names,
rounded=True,
filled=True)
In [21]:
import graphviz
In [22]:
dot_data = tree.export_graphviz(clf, out_file=None,
feature_names=iris.feature_names[2:],
class_names=iris.target_names,
rounded=True,
filled=True)
In [23]:
graph = graphviz.Source(dot_data)
In [24]:
import numpy as np
import seaborn as sns
sns.set_style('whitegrid')
import matplotlib.pyplot as plt
%matplotlib inline
In [25]:
df = sns.load_dataset('iris')
df.head()
Out[25]:
In [26]:
col = ['petal_length', 'petal_width']
X = df.loc[:, col]
In [27]:
species_to_num = {'setosa': 0,
'versicolor': 1,
'virginica': 2}
df['tmp'] = df['species'].map(species_to_num)
y = df['tmp']
In [28]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, y)
In [29]:
X[0:5]
Out[29]:
In [30]:
X.values
Out[30]:
In [31]:
X.values.reshape(-1,1)
Out[31]:
In [32]:
Xv = X.values.reshape(-1,1)
In [33]:
Xv
Out[33]:
In [34]:
h = 0.02 # set the spacing
In [35]:
Xv.min()
Out[35]:
In [36]:
Xv.max() + 1
Out[36]:
In [37]:
x_min, x_max = Xv.min(), Xv.max() + 1
In [38]:
y.min()
Out[38]:
In [39]:
y.max() + 1
Out[39]:
In [40]:
y_min, y_max = y.min(), y.max() + 1
In [41]:
y_min
Out[41]:
In [42]:
y_max
Out[42]:
In [43]:
np.arange(x_min, x_max, h)
Out[43]:
In [44]:
np.arange(y_min, y_max, h)
Out[44]:
In [45]:
np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Out[45]:
In [46]:
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
In [47]:
xx
Out[47]:
In [48]:
yy
Out[48]:
In [49]:
xx.ravel()
Out[49]:
In [50]:
xx.ravel?
In [51]:
yy.ravel()
Out[51]:
In [52]:
np.c_[xx.ravel(), yy.ravel()]
Out[52]:
In [53]:
np.c_?
In [54]:
pd.DataFrame(np.c_[xx.ravel(), yy.ravel()])
Out[54]:
In [55]:
z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
In [56]:
z
Out[56]:
In [57]:
xx.shape
Out[57]:
In [58]:
z.shape
Out[58]:
In [59]:
z = z.reshape(xx.shape)
In [60]:
z.shape
Out[60]:
In [61]:
plt.contourf?
In [62]:
fig = plt.figure(figsize=(16,10))
ax = plt.contourf(xx, yy, z, cmap = 'afmhot', alpha=0.3);
In [63]:
fig = plt.figure(figsize=(16,10))
plt.scatter(X.values[:, 0], X.values[:, 1], c=y, s=80,
alpha=0.9, edgecolors='g');
In [64]:
fig = plt.figure(figsize=(16,10))
ax = plt.contourf(xx, yy, z, cmap = 'afmhot', alpha=0.3);
plt.scatter(X.values[:, 0], X.values[:, 1], c=y, s=80,
alpha=0.9, edgecolors='g');